In [1]:
import xlrd
import pandas as pd
import numpy as np
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
len(statelist)
statelist[44]
Out[1]:
In [7]:
#add CLPRB columns
CLPRB=pd.read_excel("Data/Original Data/more MSN/CLPRB.xlsx")
for j in range(52):
dftemp = pd.read_csv('Data/Data_States/%s.csv' % (statelist[j]))
dftemp.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
dftemp['CLPRB']=0
if j==44: #for state US, missing data of GDP
continue
#else: dftemp.drop(dftemp.index[55],inplace=True) #delete last line of GDP
data = xlrd.open_workbook('Data/Original Data/more MSN/CLPRB.xlsx') # open xlsx file
table = data.sheets()[j] # open sheet j
nrows = table.nrows # get how many lines
for i in range(1,nrows): #cycle in the table
dftemp['CLPRB'][i-1] = table.row_values(i)[3] # columns 4
dftemp.to_csv('Data/Data_States/%s.csv'% (statelist[j]), encoding='utf-8', index=False)
In [13]:
#add CLPRB columns to US state
j=44
CLPRB=pd.read_excel("..\CLPRB.xlsx")
dftemp = pd.read_csv('%s.csv' % (statelist[j]))
dftemp.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
dftemp['CLPRB']=0
data = xlrd.open_workbook('..\CLPRB.xlsx') # open xlsx file
table = data.sheets()[j] # open sheet j
nrows = table.nrows # get how many lines
for i in range(1,nrows): #cycle in the table
dftemp['CLPRB'][i-1] = table.row_values(i)[3] # columns 4
dftemp.to_csv('%s.csv'% (statelist[j]), encoding='utf-8', index=False)
In [6]:
#add EMFDB columns
EMFDB=pd.read_excel("Data/Original Data/more MSN/EMFDB.xlsx")
for i in range(52):
tempdf=EMFDB[EMFDB.StateCode=='%s'% (statelist[i])]
del tempdf['MSN']
del tempdf['StateCode']
df = pd.read_csv('Data/Data_States/%s.csv' % (statelist[i]))
df_r=pd.merge(df, tempdf,on='Year',how='outer')
df_r.rename(columns={'Data':'EMFDB'}, inplace = True)
df_r.to_csv('Data/Data_States/%s.csv'% (statelist[i]), encoding='utf-8', index=False)
In [16]:
#add ENPRP columns
ENPRP=pd.read_excel("..\ENPRP.xlsx")
for i in range(52):
tempdf=ENPRP[ENPRP.StateCode=='%s'% (statelist[i])]
del tempdf['MSN']
del tempdf['StateCode']
df = pd.read_csv('%s.csv' % (statelist[i]))
df_r=pd.merge(df, tempdf,on='Year',how='outer')
df_r.rename(columns={'Data':'ENPRP'}, inplace = True)
df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)
In [17]:
#add NGMPB columns
NGMPB=pd.read_excel(open("..\\NGMPB.xlsx",'rb'))
for i in range(52):
tempdf=NGMPB[NGMPB.StateCode=='%s'% (statelist[i])]
del tempdf['MSN']
del tempdf['StateCode']
df = pd.read_csv('%s.csv' % (statelist[i]))
df_r=pd.merge(df, tempdf,on='Year',how='outer')
df_r.rename(columns={'Data':'NGMPB'}, inplace = True)
df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)
In [18]:
#add PAPRB columns
PAPRB=pd.read_excel("..\PAPRB.xlsx")
for i in range(52):
tempdf=PAPRB[PAPRB.StateCode=='%s'% (statelist[i])]
del tempdf['MSN']
del tempdf['StateCode']
df = pd.read_csv('%s.csv' % (statelist[i]))
df_r=pd.merge(df, tempdf,on='Year',how='outer')
df_r.rename(columns={'Data':'PAPRB'}, inplace = True)
df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)
In [ ]: